Learning materials

Packages nand load the data

pacman::p_load(
  dplyr,
  tidyverse,
  readxl,
  ggplot2,
  ggfortify,
  ggthemes,
  ggsci,
  ggalluvial,
  ggpubr)

# setwd("D:PhD/01_Data/03_Vitro/02_Identification of glycan utilizing microbe") # Windows
setwd("/Volumes/Yiming_Wang/PhD/01_Data/03_Vitro/02_Identification of glycan utilizing microbe") # Mac

df_all <- read_excel("colony counting.xlsx")

# color panel
library("scales")
# Define color
mypal = pal_simpsons("springfield", alpha = 0.7)(9)
mypal
## [1] "#FED439B2" "#709AE1B2" "#8A9197B2" "#D2AF81B2" "#FD7446B2" "#D5E4A2B2"
## [7] "#197EC0B2" "#F05C3BB2" "#46732EB2"
library("scales")
show_col(mypal)

Shape the data

# Shape the data
df_shape <- df_all %>% 
  select(Sample_ID, Broth, Replicate, Colony_size, Cells) %>% 
  group_by(Sample_ID, Broth, Colony_size) %>% 
  summarise(mean_per_sample=mean(Cells)) %>% 
  ungroup()

df_shape_big <- df_shape %>% 
  filter(Colony_size == "Big") %>% 
  group_by(Broth)

df_shape_medium <- df_shape %>% 
  filter(Colony_size == "Medium") %>% 
  group_by(Broth)

df_shape_small <- df_shape %>% 
  filter(Colony_size == "Small") %>% 
  group_by(Broth)

# Stats
df_stats_big <- wilcox.test(df_shape_big$mean_per_sample ~ Broth, data=df_shape_big,
                     paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)

df_stats_medium <- wilcox.test(df_shape_medium$mean_per_sample ~ Broth, data=df_shape_medium,
                     paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)

df_stats_small <- wilcox.test(df_shape_small$mean_per_sample ~ Broth, data=df_shape_small,
                     paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)
df_stats_big
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  df_shape_big$mean_per_sample by Broth
## W = 0, p-value = 0.03038
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -1061500000  -476500000
## sample estimates:
## difference in location 
##              -7.69e+08
df_stats_medium
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  df_shape_medium$mean_per_sample by Broth
## W = 16, p-value = 0.0294
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  140500000 209000000
## sample estimates:
## difference in location 
##              174750000
df_stats_small
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  df_shape_small$mean_per_sample by Broth
## W = 16, p-value = 0.03038
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  2.3e+07 8.9e+07
## sample estimates:
## difference in location 
##               40414872
df_figure <- df_all %>% 
  select(Sample_ID, Broth, Replicate, Colony_size, Cells) %>% 
  group_by(Sample_ID, Broth, Colony_size) %>% 
  summarise(mean_per_sample=mean(Cells)) %>% 
  group_by(Broth, Colony_size) %>% 
  summarise(mean_cells=mean(mean_per_sample))

Bar chart

# Shape data
df_figure <- df_all %>% 
  select(Sample_ID, Broth, Replicate, Colony_size, Cells) %>% 
  group_by(Sample_ID, Broth, Colony_size) %>% 
  summarise(mean_per_sample=mean(Cells)) %>% 
  mutate(Broth=factor(Broth,levels = c("No glycan","Glycan"))) %>% 
  mutate(Broth1 = ifelse(Broth == "No glycan", "mBasal","mBasal + 5% 2'-FL"))

# Big colony
figure_big <-  ggplot()+
  geom_bar(data = df_figure %>% 
             filter(Colony_size =="Big") %>% 
             group_by(Broth1) %>% 
             summarise(Mean_big = mean(mean_per_sample)) %>% 
             mutate(Mean_big_log = log10(Mean_big)) %>% 
             ungroup(),
           aes(x = Broth1, y = Mean_big_log, fill = Broth1), 
           # position = position_dodge (width = 0.1),
           width = 0.5, stat = "identity")+
  geom_point(data=df_figure %>% 
               filter(Colony_size =="Big") %>% 
               mutate(Mean_big_log=log10(mean_per_sample)),
             size = 4, color ="black", shape=21, alpha = 0.5,
             aes(x = Broth1, y = Mean_big_log, fill = Broth1))+
  theme_bw()+
  labs(x="", y=bquote(paste('E.coli (log'['10']*'CFU/mL)')), caption ="", title = "")+
  theme(legend.position = "none", # right,left, bottom
        legend.title = element_blank(), # can remove this
        axis.text.x=element_text(colour="black", face="plain", size=14), 
        axis.text.y=element_text(colour="black", face="plain", size=14),
        axis.title.x=element_text(margin = margin(t = 10)),
        axis.title.y=element_text(margin = margin(r = 10), size=16),
        axis.title = element_text(face="plain", size = 10),
        plot.title = element_text(size=10, hjust = 0.5),
        panel.grid = element_blank(),
        panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
        legend.background = element_rect(color="transparent"),
        aspect.ratio = 1,
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
  scale_fill_manual(values=c("#b86877","#70a1a7"))+ #BC3C29B2 and #0072B5B2,"#374E55B2" and "#DF8F44B2"
  coord_cartesian(ylim=c(6,10))+
  scale_y_continuous(breaks = seq(6,10,1)) +
  annotate("text",x=1.5,y=10,
           label="p = 0.030",hjust=0.5, size=5.5)


figure_big

figure_medium <-  ggplot()+
  geom_bar(data = df_figure %>% 
             filter(Colony_size =="Medium") %>% 
             group_by(Broth1) %>% 
             summarise(Mean_medium = mean(mean_per_sample)) %>% 
             mutate(Mean_medium_log = log10(Mean_medium)) %>% 
             ungroup(),
           aes(x = Broth1, y = Mean_medium_log, fill = Broth1), 
           # position = position_dodge (width = 0.1),
           width = 0.5, stat = "identity")+
  geom_point(data=df_figure %>% 
               filter(Colony_size =="Medium") %>% 
               mutate(Mean_medium_log=log10(mean_per_sample)),
             size = 4, color ="black", shape=21, alpha = 0.5,
             aes(x = Broth1, y = Mean_medium_log, fill = Broth1))+
  theme_bw()+
  labs(x="", y=bquote(paste('E.faecalis (log'['10']*'CFU/mL)')), caption ="", title = "")+
  theme(legend.position = "none", # right,left, bottom
        legend.title = element_blank(), # can remove this
        axis.text.x=element_text(colour="black", face="plain", size=14), 
        axis.text.y=element_text(colour="black", face="plain", size=14),
        axis.title.x=element_text(margin = margin(t = 10)),
        axis.title.y=element_text(margin = margin(r = 10), size=16),
        axis.title = element_text(face="plain", size = 10),
        plot.title = element_text(size=10, hjust = 0.5),
        panel.grid = element_blank(),
        panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
        legend.background = element_rect(color="transparent"),
        aspect.ratio = 1,
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
  scale_fill_manual(values=c("#b86877","#70a1a7"))+ #BC3C29B2 and #0072B5B2,"#374E55B2" and "#DF8F44B2"
  coord_cartesian(ylim=c(6,10))+
  scale_y_continuous(breaks = seq(6,10,1)) +
  annotate("text",x=1.5,y=10,
           label="p = 0.029",hjust=0.5, size=5.5)


figure_medium

figure_small <-  ggplot()+
  geom_bar(data = df_figure %>% 
             filter(Colony_size =="Small") %>% 
             group_by(Broth1) %>% 
             summarise(Mean_small = mean(mean_per_sample)) %>% 
             mutate(Mean_small_log = log10(Mean_small)) %>% 
             ungroup(),
           aes(x = Broth1, y = Mean_small_log, fill = Broth1), 
           # position = position_dodge (width = 0.1),
           width = 0.5, stat = "identity")+
  geom_point(data=df_figure %>% 
               filter(Colony_size =="Small") %>% 
               mutate(Mean_small_log=log10(mean_per_sample)),
             size = 3, color ="black", shape=21, alpha = 0.5,
             aes(x = Broth1, y = Mean_small_log, fill = Broth1))+
  geom_line()+
  theme_bw()+
  labs(x="", y=bquote(paste('L.murinus/reuteri (log'['10']*'CFU/mL)')), caption ="", title = "")+
  theme(legend.position = "none", # right,left, bottom
        legend.title = element_blank(), # can remove this
        axis.text.x=element_text(colour="black", face="plain", size=14), 
        axis.text.y=element_text(colour="black", face="plain", size=14),
        axis.title.x=element_text(margin = margin(t = 10)),
        axis.title.y=element_text(margin = margin(r = 10), size=16),
        axis.title = element_text(face="plain", size = 10),
        plot.title = element_text(size=10, hjust = 0.5),
        panel.grid = element_blank(),
        panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
        legend.background = element_rect(color="transparent"),
        aspect.ratio = 1,
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
  scale_fill_manual(values=c("#b86877","#70a1a7"))+ #BC3C29B2 and #0072B5B2,"#374E55B2" and "#DF8F44B2"
  coord_cartesian(ylim=c(6,10))+
  scale_y_continuous(breaks = seq(6,10,1)) +
  annotate("text",x=1.5,y=10,
           label="p = 0.030",hjust=0.5, size=5.5)


figure_small

# Alluvial graph

df_shape2 <- df_shape %>% 
  select(Broth, Colony_size, mean_per_sample) %>% 
  group_by(Broth, Colony_size) %>% 
  summarise(N=sum(mean_per_sample)) %>%
  ungroup() %>%
  group_by(Broth) %>% 
  mutate(p=N/sum(N)) %>%
  ungroup() %>% 
  mutate(label=ifelse(p>0,paste0(round(p*100,0),"%"),NA))%>%
  mutate(growth_condition=case_when(Broth=="No glycan"~
                                    "mBasal",
                                    Broth=="Glycan"~
                                    "mBasal + 5% 2'-FL")) %>% 
  mutate(Colony = ifelse(Colony_size == "Big", "E.coli",
                ifelse(Colony_size == "Medium", "E.faecalis", "L.murinus/reuteri")))

levels<-df_shape2 %>%
  filter(Broth=="No glycan") %>%
  arrange(desc(p)) %>%
  select(Colony_size)

check<-df_shape2 %>%
  group_by(growth_condition) %>%
  summarise(n=sum(N))
library(RColorBrewer)

nb.cols <- 14
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)


ggplot(data=df_shape2,aes(x=growth_condition,y=N,fill=factor(Colony,levels=levels$Colony),label=label)) +
  geom_bar(stat="identity") +
  geom_text(size=3,position = position_stack(vjust=0.5),color="white")+
  theme_pubclean()+
  scale_fill_manual(values = mycolors) +
  theme(legend.position = "left",
        legend.title = element_blank(),
        axis.title = element_blank(),
        axis.line.y=element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank()) 

df_shape2 %>% 
  ggplot(aes(x = growth_condition, y = N, fill = reorder(Colony,-N),label=label)) + 
  geom_bar(stat = "identity", position = "stack", data = df_shape2 %>% filter(Broth == "No glycan")) +
  geom_bar(stat = "identity", position = "stack", data = df_shape2 %>% filter(Broth == "Glycan")) +
  guides(fill=guide_legend("ordering"))+
  geom_text(data = df_shape2 %>% filter(Broth == "No glycan"),size=3,position = position_stack(vjust=0.5),color="white")+
  geom_text(data = df_shape2 %>% filter(Broth == "Glycan"),size=3,position = position_stack(vjust=0.5),color="white")+
  theme_pubclean()+
  scale_fill_manual(values = mycolors) +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        axis.title = element_blank(),
        axis.line.y=element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())

df_shape2 %>% 
  ggplot(aes(x = stringr:: str_wrap(growth_condition,20),label=label,stratum=reorder(Colony,-p),alluvium=reorder(Colony,-p),y=p,fill=reorder(Colony,-p)))+
  geom_stratum(color="white",decreasing=FALSE, alpha= 0.7)+ # bar
  geom_flow(aes(fill = Colony),decreasing=FALSE) +
  geom_alluvium(decreasing=FALSE, alpha = 0)+
  scale_fill_manual(values = c("#D2AF81B2","#709AE1B2", "#FD7446B2"))+  #"#94C47D","#F2B342", "#F4E6AA"
  theme_bw()+
  # geom_text(data = df_shape2 %>% filter(Broth == "No glycan"),size=3,position = position_stack(vjust=0.5),color="grey20")+
  # geom_text(data = df_shape2 %>% filter(Broth == "Glycan"),size=3,position = position_stack(vjust=0.5),color="grey20")+
  labs(title = "")+
  theme(plot.title = element_text(size = 10, face = "plain", hjust=0.5),
        legend.position = "bottom",
        axis.title = element_blank(),
        axis.line.y=element_blank(),
        axis.text.x=element_text(colour="black", face="plain", size=14), 
        axis.text.y = element_text(colour="black", face="plain",size = 14),
        axis.ticks.y = element_blank(),
        legend.title=element_text(face="bold",size=14),
        legend.text = element_text(colour="black", face="plain", size=10),
        legend.key.size = unit(0.4, 'cm'),
        panel.grid = element_blank(),
        panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
        legend.background = element_rect(color="transparent"),
        aspect.ratio = 1,
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
  guides(fill=guide_legend(title=""))+
  scale_y_continuous(labels = scales::percent)+
  annotate("text",x=1,y=0.525,
           label="95%",hjust=0.5, size=4.5)+
  annotate("text",x=0.920,y=0.029,
           label="4%",hjust=0.5, size=4)+
  annotate("text",x=1.090,y=0.005,
           label="1%",hjust=0.5, size=4)+
  annotate("text",x=2,y=0.67,
           label="66%",hjust=0.5, size=4.5)+
  annotate("text",x=2,y=0.25,
           label="18%",hjust=0.5, size=4.5)+
  annotate("text",x=2,y=0.08,
           label="16%",hjust=0.5, size=4.5)

df_shape2
## # A tibble: 6 × 7
##   Broth     Colony_size          N       p label growth_condition  Colony       
##   <chr>     <chr>            <dbl>   <dbl> <chr> <chr>             <chr>        
## 1 Glycan    Big          202000000 0.158   16%   mBasal + 5% 2'-FL E.coli       
## 2 Glycan    Medium       844000000 0.659   66%   mBasal + 5% 2'-FL E.faecalis   
## 3 Glycan    Small        234500000 0.183   18%   mBasal + 5% 2'-FL L.murinus/re…
## 4 No glycan Big         3378000000 0.950   95%   mBasal            E.coli       
## 5 No glycan Medium       145000000 0.0408  4%    mBasal            E.faecalis   
## 6 No glycan Small         33000000 0.00928 1%    mBasal            L.murinus/re…